Soil Paper Figures

Loading Required Libraries

library(tidyverse)
library(stringr)
library(dplyr)
library(phyloseq)
library(zCompositions)
library(vegan)
library(ggforce)
library(ggpicrust2)
library(ggvegan)
library(microbiome)
library(scales)
library(RColorBrewer)
library(pheatmap)
library(microeco)
library(gridExtra)
library(cowplot)

Taxanomic Analysis

Load Data

samdf <- read.delim("~/BecketLab_R/Soil/Soil Paper/soil_metadata_2.txt", sep = "\t", header = T, stringsAsFactors=FALSE)
load("~/BecketLab_R/Soil/Soil Paper/Singlem_df.RData")

Create Phyloseq Object

# Convert singlem into phyloseq object 
abundance_data <- joined_data[, 1:40]
taxonomy_data <- joined_data[, c("Root", "Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")]

rownames(samdf) <- colnames(abundance_data)

s_ps <- phyloseq(
  otu_table(as.matrix(abundance_data), taxa_are_rows = TRUE),
  tax_table(as.matrix(taxonomy_data)),
  sample_data(samdf)
)

Create Microtable object (microeco)

tidy_taxa <- tidy_taxonomy(
  taxonomy_data
)
soil_microtable <- microtable$new(
  abundance_data,
  sample_table = samdf,
  tax_table = tidy_taxa,
  phylo_tree = NULL,
  rep_fasta = NULL,
  auto_tidy = FALSE
)

Generate Colors

n <- 47
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col_vector
##  [1] "#7FC97F" "#BEAED4" "#FDC086" "#FFFF99" "#386CB0" "#F0027F" "#BF5B17"
##  [8] "#666666" "#1B9E77" "#D95F02" "#7570B3" "#E7298A" "#66A61E" "#E6AB02"
## [15] "#A6761D" "#666666" "#A6CEE3" "#1F78B4" "#B2DF8A" "#33A02C" "#FB9A99"
## [22] "#E31A1C" "#FDBF6F" "#FF7F00" "#CAB2D6" "#6A3D9A" "#FFFF99" "#B15928"
## [29] "#FBB4AE" "#B3CDE3" "#CCEBC5" "#DECBE4" "#FED9A6" "#FFFFCC" "#E5D8BD"
## [36] "#FDDAEC" "#F2F2F2" "#B3E2CD" "#FDCDAC" "#CBD5E8" "#F4CAE4" "#E6F5C9"
## [43] "#FFF2AE" "#F1E2CC" "#CCCCCC" "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3"
## [50] "#FF7F00" "#FFFF33" "#A65628" "#F781BF" "#999999" "#66C2A5" "#FC8D62"
## [57] "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F" "#E5C494" "#B3B3B3" "#8DD3C7"
## [64] "#FFFFB3" "#BEBADA" "#FB8072" "#80B1D3" "#FDB462" "#B3DE69" "#FCCDE5"
## [71] "#D9D9D9" "#BC80BD" "#CCEBC5" "#FFED6F"

Compositional Bar Chart: Phylum

# Phylum
t1 <- trans_abund$new(dataset = soil_microtable, taxrank = "Phylum", ntaxa = 10)
t1$data_abund$Land.cov <- factor(t1$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
# Class
t2 <- trans_abund$new(dataset = soil_microtable, taxrank = "Class", ntaxa = 15)
t2$data_abund$Land.cov <- factor(t2$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
# Genus
t3 <- trans_abund$new(dataset = soil_microtable, taxrank = "Family", ntaxa = 25)
t3$data_abund$Land.cov <- factor(t3$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))






#################

g1 <- t1$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE,  x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))
g1

g2 <- t2$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE,  x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))
g2

g3 <- t3$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE,  x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12)) + guides(fill=guide_legend(ncol=1,title = "Family"))
g3

plot_grid(g1, g2, g3, ncol = 1, align = 'v', labels = "AUTO")

#ggsave("~/BecketLab_R/Soil/Profiling/data/taxa_compositional_bar.svg", width = 15, height = 20, limitsize = FALSE)

Species: Alpha Diversity

s_ps@sam_data$Land.cov <- factor(s_ps@sam_data$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))

plot_richness(s_ps, x = "Land.cov", measures = c("Shannon", "Simpson"))+
  geom_boxplot(fill = "white", color = "black") +
  geom_point(aes(color = factor(site_id))) +
  geom_jitter(aes(color = factor(site_id))) + 
  theme(
    axis.text.x = element_text(angle = 0, hjust = 0.5),
  ) 

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/taxa_alpha.pdf", width = 6, height = 8, limitsize = FALSE)

Beta diversity Preprocessing - Species

otu_z <- cmultRepl(as.matrix(s_ps@otu_table), method = "GBM", output = 'p-counts', z.warning = 0.99)

#create clr function
clr <- function(x) sweep(log(x), 1, rowMeans(log(x)), "-")

#Transpose OTU tables
otu_tx <- data.frame(t(clr(t(otu_z))))

#To matrix
otu_m <- as.matrix(t(otu_tx))

######################### Make and extract NMDS scores 
nmds = metaMDS(otu_m, distance = "euclidean")
nmds_scores = as.data.frame(nmds$points)

nmds_scores$pH <- samdf$pH
nmds_scores$Soil_H20 <- samdf$Soil_H2O
nmds_scores$Bulk.D <- samdf$Bulk.D
nmds_scores$NO3 <- samdf$NO3
nmds_scores$NH4 <- samdf$NH4
nmds_scores$LOI <- samdf$LOI
nmds_scores$N <- samdf$N....
nmds_scores$C <- samdf$C....
nmds_scores$site_id <- samdf$site_id
nmds_scores$soil_type <- samdf$Land.cov

#nmds_scores$Samples <- samples.out
colnames(nmds_scores) <- c("NMDS1", "NMDS2", "pH", "Soil_H20", "Bulk.D", "NO3", "NH4", "LOI", "N", "C", "site_id", "soil_type")

Plot Beta

nmds_scores$soil_type <- factor(nmds_scores$soil_type, levels = c("C","O","A", "L", "IC", "IA"))

beta_plot <- ggplot(data = nmds_scores) +
  ggtitle("Beta-diversity by Soil Type") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_mark_ellipse(aes(x=NMDS1,y=NMDS2,fill= soil_type, color= soil_type), expand = unit(0.5,"mm")) +
  geom_point(aes(x=NMDS1, y=NMDS2, shape = soil_type)) +
  labs(fill = "Land Cover Type", shape = "Land Cover Type", color = "Land Cover Type")

beta_plot

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/taxa_beta.pdf", width = 8, height = 5, limitsize = FALSE)

Beta Stats (Anosim + PERMANOVA)

anosim(otu_m, samdf$Land.cov, distance = "euclidean", permutations = 999)
## 
## Call:
## anosim(x = otu_m, grouping = samdf$Land.cov, permutations = 999,      distance = "euclidean") 
## Dissimilarity: euclidean 
## 
## ANOSIM statistic R: 0.3794 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999
adonis2(formula = otu_m ~ samdf$Land.cov, permutations = 999, method = "euclidean")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = otu_m ~ samdf$Land.cov, permutations = 999, method = "euclidean")
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     5    51552 0.27586 2.5905  0.001 ***
## Residual 34   135326 0.72414                  
## Total    39   186878 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

CCA Preprocessing - species

# CCA requires two dataframes:
# 1st main Data (rows: samples columns: factors[phylum or OTU]) 
# 2nd Driver of the dataset ex. any continuous counts (rain.Inches)

# subset the data to only preserve main data ONLY
# create a list of colnames bc rmd 
cca_driver <- subset(nmds_scores, select = c("pH", "Soil_H20", "Bulk.D", "NO3", "NH4", "LOI", "N", "C"))
data_ps <- data.frame(t(s_ps@otu_table))
cca_ps <- cca(data_ps, cca_driver)

#plot(cca_ps)
fort_cca <- fortify(cca_ps)
# sites and biplot 

sites <- which(fort_cca$score == "sites")
biplot <- which(fort_cca$score == "biplot")

fort_cca1 <- fort_cca[sites,]
drivers <- fort_cca[biplot,]

gg_soil_cca <- cbind(fort_cca1, subset(samdf, select = c("Land.cov")))

Plot CCA - species

gg_soil_cca$Land.cov <- factor(gg_soil_cca$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))

cca_gg <- ggplot(gg_soil_cca) +
  ggtitle("CCA1 vs CCA2") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_point(aes(x = CCA1, y = CCA2 , color = factor(Land.cov), shape = Land.cov)) +
  geom_segment(data = drivers, aes(x = 0, y = 0, xend = 3*CCA1, yend = 3*CCA2), arrow = arrow(length = unit(0.1, "cm")), linewidth =0.5) +
  geom_text(data = drivers, aes(x = 3.3*CCA1, y = 3.3*CCA2, label = label)) +
  labs(color = "Land Cover Type", shape = "Land Cover Type")

cca_gg

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/taxa_cca.pdf", width = 8, height = 5, limitsize = FALSE)

LEfSe

default_colors <- scales::hue_pal()(6)

t1 <- trans_diff$new(dataset = soil_microtable, method = "lefse", group = "Land.cov")
g1 <- t1$plot_diff_bar(color_values = default_colors,use_number = 1:20)


g1$labels$colour <- "Land Cover Type"
g1$labels$fill <- "Land Cover Type"
g1$labels$group <- "Land Cover Type"


g2 <- t1$plot_diff_abund(color_values = default_colors,use_number = 1:20,  add_sig = TRUE)
g2 <- g2 + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.border = element_blank())

g2$labels$colour <- "Land Cover Type"
p <- g1 %>% aplot::insert_right(g2, width=3)
p

g1 <- g1 + theme(text=element_text(size=18), axis.text.y = element_text(size = 20))
g2 <- g2 + theme(text=element_text(size=18), axis.text = element_text(size = 12))

p <- g1 %>% aplot::insert_right(g2, width=5)
p

#ggsave("~/BecketLab_R/Soil/Profiling/data/taxa_lefse.svg", width = 20, height = 15, limitsize = FALSE)

Pearson Correlation

t2 <- trans_diff$new(dataset = soil_microtable, method = "lefse", group = "Land.cov", taxa_level = "all")
# then create trans_env object
t1 <- trans_env$new(dataset = soil_microtable, add_data = soil_microtable$sample_table[,c(2:9)])
# use other_taxa to select taxa you need
t1$cal_cor(use_data = "other", p_adjust_method = "fdr", other_taxa = t2$res_diff$Taxa[1:40])


t1$plot_cor(pheatmap = FALSE)

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/taxa_pearsonCor.pdf", width = 10, height = 5, limitsize = FALSE)

Testing

#t1$cal_cor(use_data = "other", p_adjust_method = "fdr", other_taxa = t2$res_diff$Taxa[1:10])

#t1$plot_cor(pheatmap = FALSE)

Functional Analysis

Load Data

load("~/BecketLab_R/Soil/Functional/2023/kegg_funprofiler_df.RData")

Create Phyloseq Object

# Build a phyloseq object 
brite_abudance_table <- data.frame(funprofiler_data_wpathways[,2:41])
brite_taxa_df <- as.matrix(funprofiler_data_wpathways[,c(46,45,44)])
row.names(samdf) <- colnames(brite_abudance_table)

brite_ps <- phyloseq(
  otu_table(as.matrix(brite_abudance_table), taxa_are_rows = TRUE),
  tax_table(as.matrix(brite_taxa_df)),
  sample_data(samdf)
)

Create Microtable Object

soil_func_microtable <- microtable$new(
  brite_abudance_table,
  sample_table = samdf,
  tax_table = data.frame(brite_taxa_df),
  phylo_tree = NULL,
  rep_fasta = NULL,
  auto_tidy = TRUE
)

Composititional Bar Chart

# A Level
t3 <- trans_abund$new(dataset = soil_func_microtable, taxrank = "A", ntaxa = 10)
t3$data_abund$Land.cov <- factor(t3$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
# B Level
t4 <- trans_abund$new(dataset = soil_func_microtable, taxrank = "B", ntaxa = 10)
t4$data_abund$Land.cov <- factor(t4$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))
# C Level
t5 <- trans_abund$new(dataset = soil_func_microtable, taxrank = "kegg_pathway_name", ntaxa = 30)
t5$data_abund$Land.cov <- factor(t5$data_abund$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))


t3 <- t3$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE,  x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))

t4 <- t4$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE,  x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))

t5 <- t5$plot_bar(bar_full = TRUE, others_color = "grey70", facet = "Land.cov",legend_text_italic = FALSE,clustering = TRUE,  x_axis_name = "site_id", color_values = RColorBrewer::brewer.pal(10, "Paired")) + theme_classic() + theme(text=element_text(size=18), axis.text = element_text(size = 12))

t5

plot_grid(t3, t4, t5, ncol = 1, align = 'v', labels = "AUTO")

t3

ggsave("~/BecketLab_R/Soil/Profiling/data/func_compositional_bar.png", width = 15, height = 5, limitsize = FALSE)

Pathway Alpha Diversity

brite_ps@sam_data$Land.cov <- factor(brite_ps@sam_data$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))

plot_richness(brite_ps, x = "Land.cov", measures = c("Shannon", "Simpson"))+
  geom_boxplot(fill = "white", color = "black") +
  geom_point(aes(color = factor(site_id))) +
  geom_jitter(aes(color = factor(site_id))) + 
  theme(
    axis.text.x = element_text(angle = 0, hjust = 0.5),
  ) 

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_alpha.pdf", width = 6, height = 8, limitsize = FALSE)

Beta diversity Processing

otu_z<- cmultRepl(as.matrix(brite_ps@otu_table), method = "GBM", output = 'p-counts', z.warning = 0.99)

#create clr function
clr <- function(x) sweep(log(x), 1, rowMeans(log(x)), "-")

#Transpose OTU tables
otu_tx <- data.frame(t(clr(t(otu_z))))

#To matrix
otu_m <- as.matrix(t(otu_tx))

######################### Make and extract NMDS scores 
nmds = metaMDS(otu_m, distance = "euclidean")
nmds_scores = as.data.frame(nmds$points)

nmds_scores$pH <- samdf$pH
nmds_scores$Soil_H20 <- samdf$Soil_H2O
nmds_scores$Bulk.D <- samdf$Bulk.D
nmds_scores$NO3 <- samdf$NO3
nmds_scores$NH4 <- samdf$NH4
nmds_scores$LOI <- samdf$LOI
nmds_scores$N <- samdf$N....
nmds_scores$C <- samdf$C....
nmds_scores$site_id <- samdf$site_id
nmds_scores$soil_type <- samdf$Land.cov


colnames(nmds_scores) <- c("NMDS1", "NMDS2", "pH", "Soil_H20", "Bulk.D", "NO3", "NH4", "LOI", "N", "C", "site_id", "soil_type")


nmds_scores$soil_type <- factor(nmds_scores$soil_type, levels = c("C","O","A", "L", "IC", "IA"))

Plot Beta

beta_plot_brite <- ggplot(data = nmds_scores) +
  ggtitle("Beta-diversity by Soil Type") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_mark_ellipse(aes(x=NMDS1,y=NMDS2,fill= soil_type, color= soil_type), expand = unit(0.5,"mm")) +
  geom_point(aes(x=NMDS1, y=NMDS2, shape = soil_type)) +
  labs(fill = "Land Cover Type", shape = "Land Cover Type", color = "Land Cover Type")

beta_plot_brite

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_beta.pdf", width = 8, height = 5, limitsize = FALSE)

Beta Stats (Anosim + PERMANOVA)

anosim(otu_m, samdf$Land.cov, distance = "euclidean", permutations = 99)
## 
## Call:
## anosim(x = otu_m, grouping = samdf$Land.cov, permutations = 99,      distance = "euclidean") 
## Dissimilarity: euclidean 
## 
## ANOSIM statistic R: 0.1599 
##       Significance: 0.01 
## 
## Permutation: free
## Number of permutations: 99
adonis2(formula = otu_m ~ samdf$Land.cov, permutations = 99, method = "euclidean")
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 99
## 
## adonis2(formula = otu_m ~ samdf$Land.cov, permutations = 99, method = "euclidean")
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     5   119532 0.15822 1.2782   0.01 **
## Residual 34   635934 0.84178                 
## Total    39   755466 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

CCA Preprocessing

cca_driver <- subset(nmds_scores, select = c("pH", "Soil_H20", "Bulk.D", "NO3", "NH4", "LOI", "N", "C"))
data_ps <- data.frame(t(brite_ps@otu_table))
cca_ps <- cca(data_ps, cca_driver)

#plot(cca_ps)
fort_cca <- fortify(cca_ps)

sites <- which(fort_cca$score == "sites")
biplot <- which(fort_cca$score == "biplot")

fort_cca1 <- fort_cca[sites,]
drivers <- fort_cca[biplot,]

gg_soil_cca <- cbind(fort_cca1, subset(samdf, select = c("Land.cov")))

##
gg_soil_cca$Land.cov <- factor(gg_soil_cca$Land.cov, levels = c("C","O","A", "L", "IC", "IA"))

Plot CCA - Pathways

cca_gg <- ggplot(gg_soil_cca) +
  ggtitle("CCA1 vs CCA2") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_point(aes(x = CCA1, y = CCA2 , color = factor(Land.cov), shape = Land.cov)) +
  geom_segment(data = drivers, aes(x = 0, y = 0, xend = 3*CCA1, yend = 3*CCA2), arrow = arrow(length = unit(0.1, "cm")), linewidth =0.5) +
  geom_text(data = drivers, aes(x = 3.3*CCA1, y = 3.3*CCA2, label = label)) +
  labs(color = "Land Cover Type", shape = "Land Cover Type")

cca_gg

ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_cca.pdf", width = 8, height = 5, limitsize = FALSE)

LEfse

default_colors <- scales::hue_pal()(6)


cust_default_colors <- default_colors[c(1,2,4)]
soil_t1 <- trans_diff$new(dataset = soil_func_microtable, method = "lefse", group = "Land.cov", taxa_level = "kegg_pathway_name")
soil_t1$res_diff$Group <- factor(soil_t1$res_diff$Group, levels = c("C","O","A", "L", "IC", "IA"))

soil_g1 <- soil_t1$plot_diff_bar(color_values = cust_default_colors)
soil_g1$labels$colour <- "Land Cover Type"
soil_g1$labels$fill <- "Land Cover Type"
soil_g1$labels$group <- "Land Cover Type"


soil_g2 <- soil_t1$plot_diff_abund(color_values = default_colors)

soil_g2 <- soil_g2 + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.border = element_blank())
soil_g2$labels$colour <- "Land Cover Type"



soil_g1 <- soil_g1 + theme(axis.text.y = element_text(size = 20))
soil_g2 <- soil_g2 

soil_p <- soil_g1 %>% aplot::insert_right(soil_g2, width=3)
soil_p

soil_g1

soil_g2

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_lefse.pdf", width = 10, height = 15, limitsize = FALSE)

Pearson Correlation

soil_t2 <- trans_diff$new(dataset = soil_func_microtable, method = "lefse", group = "Land.cov", taxa_level = "kegg_pathway_name")
# then create trans_env object
soil_t1 <- trans_env$new(dataset = soil_func_microtable, add_data = soil_func_microtable$sample_table[,c(2:9)])
# use other_taxa to select taxa you need
soil_t1$cal_cor(use_data = "other", p_adjust_method = "fdr", other_taxa = soil_t2$res_diff$Taxa[1:40])

soil_t1$plot_cor(pheatmap = FALSE)

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/func_pearsonCor.pdf", width = 10, height = 5, limitsize = FALSE)

Additional Stats/Figures

Taxa: Alpha Kruskal Wallis Pairwise Comparison Testing

# Step 1: Extract richness data
richness_df <- estimate_richness(s_ps, measures = c("Shannon", "Simpson"))
richness_df$Land.cov <- sample_data(s_ps)$Land.cov

# Step 2: Perform Kruskal-Wallis test
kruskal_result_shan <- kruskal.test(Shannon ~ Land.cov, data = richness_df)
kruskal_result_simp <- kruskal.test(Simpson ~ Land.cov, data = richness_df)
print(kruskal_result_shan)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Shannon by Land.cov
## Kruskal-Wallis chi-squared = 17.673, df = 5, p-value = 0.003385
print(kruskal_result_simp)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Simpson by Land.cov
## Kruskal-Wallis chi-squared = 15.779, df = 5, p-value = 0.007503
# Step 3: Perform pairwise Wilcoxon test (equivalent to pairwise Kruskal-Wallis)
pairwise_test <- pairwise.wilcox.test(
    richness_df$Shannon,
    richness_df$Land.cov,
    p.adjust.method = "BH"    # Benjamini-Hochberg correction for multiple testing
)

pairwise_test2 <- pairwise.wilcox.test(
    richness_df$Simpson,
    richness_df$Land.cov,
    p.adjust.method = "BH"    # Benjamini-Hochberg correction for multiple testing
)
print(pairwise_test)
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  richness_df$Shannon and richness_df$Land.cov 
## 
##    C     O     A     L     IC   
## O  0.832 -     -     -     -    
## A  0.083 0.418 -     -     -    
## L  0.878 0.855 0.429 -     -    
## IC 0.024 0.024 0.083 0.035 -    
## IA 0.024 0.024 0.035 0.024 0.429
## 
## P value adjustment method: BH
print(pairwise_test2)
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  richness_df$Simpson and richness_df$Land.cov 
## 
##    C     O     A     L     IC   
## O  0.798 -     -     -     -    
## A  0.745 0.321 -     -     -    
## L  0.772 0.552 0.521 -     -    
## IC 0.071 0.030 0.321 0.136 -    
## IA 0.030 0.030 0.048 0.030 0.122
## 
## P value adjustment method: BH

Func: Alpha Kruskal Wallis Pairwise Comparison Testing

# Step 1: Extract richness data
richness_df <- estimate_richness(brite_ps, measures = c("Shannon", "Simpson"))
richness_df$Land.cov <- sample_data(brite_ps)$Land.cov

# Step 2: Perform Kruskal-Wallis test
kruskal_result_shan <- kruskal.test(Shannon ~ Land.cov, data = richness_df)
kruskal_result_simp <- kruskal.test(Simpson ~ Land.cov, data = richness_df)
print(kruskal_result_shan)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Shannon by Land.cov
## Kruskal-Wallis chi-squared = 8.2619, df = 5, p-value = 0.1424
print(kruskal_result_simp)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Simpson by Land.cov
## Kruskal-Wallis chi-squared = 4.5814, df = 5, p-value = 0.4691
# Step 3: Perform pairwise Wilcoxon test (equivalent to pairwise Kruskal-Wallis)
pairwise_test <- pairwise.wilcox.test(
    richness_df$Shannon,
    richness_df$Land.cov,
    p.adjust.method = "BH"    # Benjamini-Hochberg correction for multiple testing
)

pairwise_test2 <- pairwise.wilcox.test(
    richness_df$Simpson,
    richness_df$Land.cov,
    p.adjust.method = "BH"    # Benjamini-Hochberg correction for multiple testing
)
print(pairwise_test)
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  richness_df$Shannon and richness_df$Land.cov 
## 
##    C    O    A    L    IC  
## O  0.93 -    -    -    -   
## A  0.93 1.00 -    -    -   
## L  0.18 0.18 0.19 -    -   
## IC 0.93 1.00 0.93 0.18 -   
## IA 0.92 1.00 0.93 0.18 1.00
## 
## P value adjustment method: BH
print(pairwise_test2)
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  richness_df$Simpson and richness_df$Land.cov 
## 
##    C    O    A    L    IC  
## O  0.71 -    -    -    -   
## A  0.96 0.86 -    -    -   
## L  0.98 0.71 0.71 -    -   
## IC 0.86 1.00 1.00 0.71 -   
## IA 0.71 1.00 0.98 0.71 1.00
## 
## P value adjustment method: BH

Taxa: Pearson Correlation grouped by Land Cover Type

t2 <- trans_diff$new(dataset = soil_microtable, method = "lefse", group = "Land.cov", taxa_level = "Phylum")
# then create trans_env object
t1 <- trans_env$new(dataset = soil_microtable, add_data = soil_microtable$sample_table[,c(2:9)])
# use other_taxa to select taxa you need
t1$cal_cor(by_group = "Land.cov",use_data = "other", p_adjust_method = "fdr", other_taxa = t2$res_diff$Taxa[1:40])
t1$res_cor$by_group <- factor(t1$res_cor$by_group, levels = c("C","O","A", "L", "IC", "IA"))

t1$plot_cor(pheatmap = FALSE)

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/sup_taxa_pearsonCor.pdf", width = 20, height = 10, limitsize = FALSE)

Func: Pearson Correlation grouped by Land Cover Type

soil_t2 <- trans_diff$new(dataset = soil_func_microtable, method = "lefse", group = "Land.cov", taxa_level = "kegg_pathway_name")
# then create trans_env object
soil_t1 <- trans_env$new(dataset = soil_func_microtable, add_data = soil_func_microtable$sample_table[,c(2:9)])
# use other_taxa to select taxa you need
soil_t1$cal_cor(by_group = "Land.cov",use_data = "other", p_adjust_method = "fdr", other_taxa = soil_t2$res_diff$Taxa[1:40])
soil_t1$res_cor$by_group <- factor(soil_t1$res_cor$by_group, levels = c("C","O","A", "L", "IC", "IA"))

soil_t1$plot_cor(pheatmap = FALSE)

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/sup_func_pearsonCor.pdf", width = 20, height = 10, limitsize = FALSE)

Feature Importance: Taxanomic Metadata - Land Cover Type

set.seed(145)
soil_t1 <- trans_env$new(dataset = soil_microtable, add_data = soil_microtable$sample_table[,c(2:9)])
soil_tmp <- soil_t1$data_env %>% t %>% as.data.frame
# caret package should be installed
soil_t2 <- trans_classifier$new(dataset = soil_microtable, x.predictors = soil_tmp, y.response = "Land.cov")
soil_t2$cal_preProcess(method = c("center", "scale", "nzv"))
# All samples are used in training if cal_split function is not performed
soil_t2$cal_train(method = "rf")

# generate significance with rfPermute package
# require rfPermute package to be installed
soil_t2$cal_feature_imp(rf_feature_sig = TRUE)

soil_t2$plot_feature_imp(show_sig_group = TRUE, coord_flip = TRUE, width = 0.6, add_sig = TRUE)

#ggsave("~/BecketLab_R/Soil/Soil Paper/output/sup_taxa_meta_randomforest.pdf", width = 10, height = 10, limitsize = FALSE)